Load R packages

##Seurat, dpylr, reshape2, ggplot2, patchwork, cowplot needed

#install.packages("Seurat")
library(Seurat)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape2)
#library(SeuratData)

Load in seurat objects

Set Default Assays to “RNA” for all datasets

## Data Integration - Standard Workflow
DefaultAssay(p01.counts) <- "RNA"
DefaultAssay(p02.counts) <- "RNA"
DefaultAssay(p10.counts) <- "RNA"
DefaultAssay(norm.brca.integrated.epithelial) <- "RNA"

Subset Norm/BRCA Dataset to Epithelial Cells

#SplitObject command will generate subsets of a Seurat object in list form based on the given metadata column. For this, we subset on the "Compartment" column, to generate two new objects, the epithelial compartment and the stromal compartment
norm.brca.subset.objects<-SplitObject(norm.brca.integrated.epithelial,"Compartment")

#We now seperate out each component of the list into seperate Seurat objects. The "$" command specifies which part of the full list we want to access.
norm.brca.epithelial.object<-norm.brca.subset.objects$EPITHELIAL
norm.brca.stromal.object<-norm.brca.subset.objects$STROMAL

Subset Norm/BRCA Dataset by Individual

#SplitObject command will generate subsets of a Seurat object in list form based on the given metadata column. For this, we subset on the "individual" column, to generate 6 new objects, for each patient. 
norm.brca.epithelial.individual.objects<-SplitObject(norm.brca.epithelial.object,"individual")

#We now seperate out each component of the list into seperate Seurat objects. The "$" command specifies which part of the full list we want to access.
norm.brca.epithelial.individual.1.objects<-norm.brca.epithelial.individual.objects$ind1
norm.brca.epithelial.individual.2.objects<-norm.brca.epithelial.individual.objects$ind2
norm.brca.epithelial.individual.3.objects<-norm.brca.epithelial.individual.objects$ind3
norm.brca.epithelial.individual.4.objects<-norm.brca.epithelial.individual.objects$ind4
norm.brca.epithelial.individual.9.objects<-norm.brca.epithelial.individual.objects$ind9
norm.brca.epithelial.individual.10.objects<-norm.brca.epithelial.individual.objects$ind10

Generate a list of Seurat objects (PDX models and Norm/BRCA Individuals) to perform the standard workflow integration on

#To construct a reference, we will identify ‘anchors’ between the individual datasets. First, we combine each Seurat object into a list, with each dataset as an element.
#The list() function groups elements together in the form of list("X1"=Y1,"X2",Y2,...), where Xn is the name you want the list element to be called and Yn is the component you want added to the list. For this analysis, we want to keep track of which Seurat object belongs to which model/patient.
#first part 
standard.workflow.object.list <- 
#list("hci001"=p01.cc.updated,"hci002"=p02.updated,"hci010"=p10.updated)
list("hci001"=p01.counts,"hci002"=p02.counts,"hci010"=p10.counts,"n_patient1"=norm.brca.epithelial.individual.1.objects, "b_patient2"=norm.brca.epithelial.individual.2.objects, "b_patient3"=norm.brca.epithelial.individual.3.objects, "b_patient4" = norm.brca.epithelial.individual.4.objects, "n_patient9" = norm.brca.epithelial.individual.9.objects, "n_patient10"=norm.brca.epithelial.individual.10.objects )

Preprocessing before finding anchors by normalizing the data and identifying variable features

#Prior to finding anchors, we perform standard preprocessing (log-normalization), and identify variable features individually for each. Note that Seurat v3 implements an improved method for variable feature selection based on a variance stabilizing transformation ("vst")
for (i in 1:length(standard.workflow.object.list)) {
    standard.workflow.object.list[[i]] <- NormalizeData(standard.workflow.object.list[[i]], verbose = TRUE)
    standard.workflow.object.list[[i]] <- FindVariableFeatures(standard.workflow.object.list[[i]], selection.method = "vst", 
        nfeatures = 2000, verbose = TRUE)
}

Find the integration anchors for the PDX and Norm/BRCA datasets

#Next we find anchors, which are pairwise correspondants between individual cells which originate from the same biological state. These anchors are then used to transfer infromation from one dataset to another
reference.list <- standard.workflow.object.list
integration.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)

Integration of Data using the defined anchors

#After 
#if hitting vector limit issue, open terminal, set cd to home directory, use "open .Renviron" and set the R_MAX_VSIZE=20Gb and save the file, you need to restart R for these changes to be saved 
integrated.data <- IntegrateData(anchorset = integration.anchors, dims = 1:30)
#save(integrated.data, file = "/Users/paigehalas/Desktop/integrated.data.Rda")

Load Integrated Data

load("/Users/paigehalas/Desktop/integrated.data.Rda")

Load Libraries for Visualization of Data

#load necessary packages for visualization 
library(ggplot2)
library(cowplot)
## 
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
##   default ggplot2 theme anymore. To recover the previous
##   behavior, execute:
##   theme_set(theme_cowplot())
## ********************************************************
library(patchwork)
## 
## Attaching package: 'patchwork'
## The following object is masked from 'package:cowplot':
## 
##     align_plots
# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
#DefaultAssay(integrated.data) <- "integrated"

Scale integrated data, run PCA and plot via UMAP

scale.integrated.data <- ScaleData(integrated.data, verbose = TRUE)
PCA.integrated.data<- RunPCA(scale.integrated.data, npcs = 30, verbose = TRUE)
UMAP.integrated.data <- RunUMAP(PCA.integrated.data, reduction = "pca", dims = 1:30)
#saveRDS(UMAP.integrated.data, file = "/Users/paigehalas/Desktop/UMAP.integrated.data.rds")

Integration Visualization

UMAP.integrated.data <- readRDS("/Users/paigehalas/Desktop/UMAP.integrated.data.rds")
DimPlot(UMAP.integrated.data, reduction = "umap", label = TRUE)
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.

Feature plot for nFeature_RNA to compare PDX cells and Norm/BRCA cells

FeaturePlot(UMAP.integrated.data, features = "nFeature_RNA")

Recalculate the percent mito using the Seurat tutorial

#integrated.data <- load("Users/paigehalas/Desktop/integrated.data.rda")
DefaultAssay(UMAP.integrated.data) <- "RNA"
UMAP.integrated.data[["percent.mt"]] <- PercentageFeatureSet(UMAP.integrated.data, pattern = "^MT-")

Plot UMAP with the new percent mito

FeaturePlot(UMAP.integrated.data, features = "percent.mt")

Rescale the data to account for differences in n_Feature and percent mito

DefaultAssay(integrated.data) <- "integrated"
r.scale.integrated.data <- ScaleData(integrated.data,vars.to.regress=c("nFeature_RNA", "percent.mt"))
r.PCA.integrated.data<- RunPCA(r.scale.integrated.data, npcs = 30, verbose = TRUE)
r.UMAP.integrated.data <- RunUMAP(r.PCA.integrated.data, reduction = "pca", dims = 1:30)
saveRDS(r.UMAP.integrated.data, file = "/Users/paigehalas/Desktop/r.UMAP.integrated.data.rds")

Look at the UMAP and feature plots for n_Feature and percent mito regressed out

r.UMAP.integrated.data <- readRDS("/Users/paigehalas/Desktop/r.UMAP.integrated.data.rds")
DimPlot(r.UMAP.integrated.data, reduction = "umap", label = TRUE)

Calculate percent mito for regressed out data

DefaultAssay(r.UMAP.integrated.data) <- "RNA"
r.UMAP.integrated.data[["percent.mt"]] <- PercentageFeatureSet(r.UMAP.integrated.data, pattern = "^MT-")

Feature plot of regressed out features and mito for RNA Features

FeaturePlot(r.UMAP.integrated.data, features = "nFeature_RNA")

Feature plot of regressed out features and mito for mito

FeaturePlot(r.UMAP.integrated.data, features = "percent.mt")

Integration with downsampling 10X Dataset

Load in seurat objects

Look at the Counts for Each Object

Split the Norm/BRCA by Compartment and Individual

Downsample NORM/BRCA datasets to have a similar number of cells as PDX samples

ds.norm.brca.epithelial.individual.1.objects <- subset(norm.brca.epithelial.individual.1.objects, downsample = 373)
ds.norm.brca.epithelial.individual.2.objects <- subset(norm.brca.epithelial.individual.2.objects, downsample = 373)
ds.norm.brca.epithelial.individual.3.objects <- subset(norm.brca.epithelial.individual.3.objects, downsample = 373)
ds.norm.brca.epithelial.individual.4.objects <- subset(norm.brca.epithelial.individual.4.objects, downsample = 373)
ds.norm.brca.epithelial.individual.9.objects <- subset(norm.brca.epithelial.individual.9.objects, downsample = 373)
ds.norm.brca.epithelial.individual.10.objects <- subset(norm.brca.epithelial.individual.10.objects, downsample = 373)

Generate a list of Seurat objects (PDX models and Norm/BRCA Individuals–downsampled to 373) to perform the standard workflow integration on

#To construct a reference, we will identify ‘anchors’ between the individual datasets. First, we combine each Seurat object into a list, with each dataset as an element.
#The list() function groups elements together in the form of list("X1"=Y1,"X2",Y2,...), where Xn is the name you want the list element to be called and Yn is the component you want added to the list. For this analysis, we want to keep track of which Seurat object belongs to which model/patient.
#first part 
standard.workflow.object.list <- 
#list("hci001"=p01.cc.updated,"hci002"=p02.updated,"hci010"=p10.updated)
list("hci001"=p01.counts,"hci002"=p02.counts,"hci010"=p10.counts,"n_patient1"=ds.norm.brca.epithelial.individual.1.objects, "b_patient2"=ds.norm.brca.epithelial.individual.2.objects, "b_patient3"=ds.norm.brca.epithelial.individual.3.objects, "b_patient4" = ds.norm.brca.epithelial.individual.4.objects, "n_patient9" = ds.norm.brca.epithelial.individual.9.objects, "n_patient10"=ds.norm.brca.epithelial.individual.10.objects )

Preprocessing before finding anchors by normalizing the data and identifying variable features–downsampled

#Prior to finding anchors, we perform standard preprocessing (log-normalization), and identify variable features individually for each. Note that Seurat v3 implements an improved method for variable feature selection based on a variance stabilizing transformation ("vst")
for (i in 1:length(standard.workflow.object.list)) {
    standard.workflow.object.list[[i]] <- NormalizeData(standard.workflow.object.list[[i]], verbose = TRUE)
    standard.workflow.object.list[[i]] <- FindVariableFeatures(standard.workflow.object.list[[i]], selection.method = "vst", 
        nfeatures = 2000, verbose = TRUE)
}

Find the integration anchors for the PDX and Norm/BRCA datasets–downsampled

#Next we find anchors, which are pairwise correspondants between individual cells which originate from the same biological state. These anchors are then used to transfer infromation from one dataset to another
reference.list <- standard.workflow.object.list
integration.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)

Integration of Data using the defined anchors for downsampled data

#After 
#if hitting vector limit issue, open terminal, set cd to home directory, use "open .Renviron" and set the R_MAX_VSIZE=20Gb and save the file, you need to restart R for these changes to be saved 
ds.integrated.data <- IntegrateData(anchorset = integration.anchors, dims = 1:30)
save(ds.integrated.data, file = "/Users/paigehalas/Desktop/ds.integrated.data.Rda")

Load Integrated Dwonsampled Data

load("/Users/paigehalas/Desktop/ds.integrated.data.Rda")

Load Libraries for Visualization of Data

#load necessary packages for visualization 
library(ggplot2)
library(cowplot)
library(patchwork)

# switch to integrated assay. The variable features of this assay are automatically
# set during IntegrateData
#DefaultAssay(integrated.data) <- "integrated"

Scale downsampled integrated data, run PCA and plot via UMAP

scale.integrated.data <- ScaleData(ds.integrated.data, verbose = TRUE)
PCA.integrated.data<- RunPCA(scale.integrated.data, npcs = 30, verbose = TRUE)
UMAP.integrated.data <- RunUMAP(PCA.integrated.data, reduction = "pca", dims = 1:30)
#saveRDS(UMAP.integrated.data, file = "/Users/paigehalas/Desktop/ds.UMAP.integrated.data.rds")

Look at the UMAP and feature plots for downsampled integration

#ds.UMAP.integrated.data <- readRDS("/Users/paigehalas/Desktop/ds.UMAP.integrated.data.rds")
DimPlot(UMAP.integrated.data, reduction = "umap", label = TRUE)

Feature plot for nFeature_RNA to compare PDX cells and Norm/BRCA cells that are downsampled

FeaturePlot(UMAP.integrated.data, features = "nFeature_RNA")

Calculate percent mito for downsampled data

DefaultAssay(UMAP.integrated.data) <- "RNA"
UMAP.integrated.data[["percent.mt"]] <- PercentageFeatureSet(UMAP.integrated.data, pattern = "^MT-")

Feature plot for percent mito to compare PDX cells and Norm/BRCA cells that are downsampled

FeaturePlot(UMAP.integrated.data, features = "percent.mito")

Define cluster markers

#markers.RNA <- FindAllMarkers(UMAP.integrated.data, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, assay = "RNA")
#write.table(markers, file = "/Users/paigehalas/Desktop/norm.brca.pdx.std.integrated.res025.RNA.markers.RNA.txt")
#all.genes<-rownames(integrated.data@assays$RNA)
#integrated.data<-ScaleData(integrated.data,features = all.genes,assay = "RNA")
#save(integrated.data.g, file = "/Users/paigehalas/Desktop/integrated.data.all.genes.scale.rda")

Look at top 10 genes for RNA assay

#load("/Users/paigehalas/Desktop/integrated.data.all.genes.scale.rda")
#markers.RNA <- read.table("/Users/paigehalas/Desktop/norm.brca.pdx.std.integrated.res025.RNA.markers.RNA.txt")
#top10 <- markers.RNA %>% group_by(cluster) %>% top_n(n = 10, wt = avg_logFC)
#top10 <- as.character(top10$gene)
#DoHeatmap(integrated.data, features = top10$gene, assay = "RNA") + NoLegend()

Label Transfer

Import Objects

R Label Transfer Prep Object Input

#Load TPM Seurat Objects to Transfer MetaData
load("seurat-objects/hci001.seurat3.object.Rda")
load("seurat-objects/hci002.seurat3.object.Rda")
load("seurat-objects/hci010.seurat3.object.Rda")

Label Transfer Reorder Metadata

#Reorder HCI001 Meta Data
p01.cc.updated[["cell.names"]]<-rownames(p01.cc.updated@meta.data)
p01.cc.updated@meta.data<-arrange(p01.cc.updated@meta.data, cell.names)
rownames(p01.cc.updated@meta.data)<-as.character(p01.cc.updated$cell.names)

p01.counts[["cell.names"]]<-rownames(p01.counts@meta.data)
p01.counts@meta.data<-arrange(p01.counts@meta.data, cell.names)
rownames(p01.counts@meta.data)<-as.character(p01.counts$cell.names)

#Add HCI001 Meta Data to Counts Seurat Object
p01.counts[["mouse"]]<-p01.cc.updated$mouse
p01.counts[["burden"]]<-p01.cc.updated$burden
p01.counts[["tissue"]]<-p01.cc.updated$tissue
p01.counts[["paper.ident"]]<-p01.cc.updated$paper.ident
p01.counts[["pdx.surgery.type"]]<-p01.cc.updated$pdx.surgery.type
p01.counts[["pdx.surgical.side"]]<-p01.cc.updated$pdx.surgical.side

#Reorder HCI002 Meta Data
p02.updated[["cell.names"]]<-rownames(p02.updated@meta.data)
p02.updated@meta.data<-arrange(p02.updated@meta.data, cell.names)
rownames(p02.updated@meta.data)<-as.character(p02.updated$cell.names)

p02.counts[["cell.names"]]<-rownames(p02.counts@meta.data)
p02.counts@meta.data<-arrange(p02.counts@meta.data, cell.names)
rownames(p02.counts@meta.data)<-as.character(p02.counts$cell.names)

#Add HCI002 Meta Data to Counts Seurat Object
p02.counts[["mouse"]]<-p02.updated$mouse
p02.counts[["burden"]]<-p02.updated$burden
p02.counts[["tissue"]]<-p02.updated$tissue
p02.counts[["paper.ident"]]<-p02.updated$paper.ident
p02.counts[["pdx.surgery.type"]]<-p02.updated$pdx.surgery.type
p02.counts[["pdx.surgical.side"]]<-p02.updated$pdx.surgical.side

#Reorder HCI010 Meta Data
p10.updated[["cell.names"]]<-rownames(p10.updated@meta.data)
p10.updated@meta.data<-arrange(p10.updated@meta.data, cell.names)
rownames(p10.updated@meta.data)<-as.character(p10.updated$cell.names)

p10.counts[["cell.names"]]<-rownames(p10.counts@meta.data)
p10.counts@meta.data<-arrange(p10.counts@meta.data, cell.names)
rownames(p10.counts@meta.data)<-as.character(p10.counts$cell.names)

#Add HCI010 Meta Data to Counts Seurat Object
p10.counts[["mouse"]]<-p10.updated$mouse
p10.counts[["burden"]]<-p10.updated$burden
p10.counts[["tissue"]]<-p10.updated$tissue
p10.counts[["paper.ident"]]<-p10.updated$paper.ident
p10.counts[["pdx.surgery.type"]]<-p10.updated$pdx.surgery.type
p10.counts[["pdx.surgical.side"]]<-p10.updated$pdx.surgical.side

Label Transfer Calculation (HCI001)

Label Transfer Calculation (HCI002)

Label Transfer Calculation (HCI010)

HCI001 Visualize Label Transfer

library(pheatmap)
library(RColorBrewer)
library(dendsort)
library(reshape2)
## Using tissue, paper.ident, predicted.id, cell.names as id variables

HCI002 Visualization Label Transfer

## Using tissue, paper.ident, predicted.id, cell.names as id variables

HCI010 Vizualization of Label Transfer

## Using tissue, paper.ident, predicted.id, cell.names as id variables

Adding columns for dataset types

Adding cell types to the dataset